1   package org.apache.solr.handler.component;
2   
3   import java.util.ArrayList;
4   import java.util.Arrays;
5   import java.util.List;
6   
7   import org.apache.solr.SolrTestCaseJ4;
8   import org.apache.solr.common.params.TermVectorParams;
9   import org.junit.BeforeClass;
10  import org.junit.Test;
11  /*
12   * Licensed to the Apache Software Foundation (ASF) under one or more
13   * contributor license agreements.  See the NOTICE file distributed with
14   * this work for additional information regarding copyright ownership.
15   * The ASF licenses this file to You under the Apache License, Version 2.0
16   * (the "License"); you may not use this file except in compliance with
17   * the License.  You may obtain a copy of the License at
18   *
19   *     http://www.apache.org/licenses/LICENSE-2.0
20   *
21   * Unless required by applicable law or agreed to in writing, software
22   * distributed under the License is distributed on an "AS IS" BASIS,
23   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24   * See the License for the specific language governing permissions and
25   * limitations under the License.
26   */
27  
28  
29  /**
30   *
31   *
32   **/
33  public class TermVectorComponentTest extends SolrTestCaseJ4 {
34    @BeforeClass
35    public static void beforeClass() throws Exception {
36      initCore("solrconfig.xml","schema.xml");
37  
38      assertU(adoc("id", "0",
39              "test_posoffpaytv", "This is a title and another title",
40              "test_posofftv", "This is a title and another title",
41              "test_basictv", "This is a title and another title",
42              "test_notv", "This is a title and another title",
43              "test_postv", "This is a title and another title",
44              "test_offtv", "This is a title and another title"
45      ));
46      assertU(adoc("id", "1",
47              "test_posoffpaytv", "The quick reb fox jumped over the lazy brown dogs.",
48              "test_posofftv", "The quick reb fox jumped over the lazy brown dogs.",
49              "test_basictv", "The quick reb fox jumped over the lazy brown dogs.",
50              "test_notv", "The quick reb fox jumped over the lazy brown dogs.",
51              "test_postv", "The quick reb fox jumped over the lazy brown dogs.",
52              "test_offtv", "The quick reb fox jumped over the lazy brown dogs."
53      ));
54      assertU(adoc("id", "2",
55              "test_posoffpaytv", "This is a document",
56              "test_posofftv", "This is a document",
57              "test_basictv", "This is a document",
58              "test_notv", "This is a document",
59              "test_postv", "This is a document",
60              "test_offtv", "This is a document"
61      ));
62      assertU(adoc("id", "3",
63              "test_posoffpaytv", "another document",
64              "test_posofftv", "another document",
65              "test_basictv", "another document",
66              "test_notv", "another document",
67              "test_postv", "another document",
68              "test_offtv", "another document"
69      ));
70      //bunch of docs that are variants on blue
71      assertU(adoc("id", "4",
72              "test_posoffpaytv", "blue",
73              "test_posofftv", "blue",
74              "test_basictv", "blue",
75              "test_notv", "blue",
76              "test_postv", "blue",
77              "test_offtv", "blue"
78      ));
79      assertU(adoc("id", "5",
80              "test_posoffpaytv", "blud",
81              "test_posofftv", "blud",
82              "test_basictv", "blud",
83              "test_notv", "blud",
84              "test_postv", "blud",
85              "test_offtv", "blud"
86      ));
87      assertU(adoc("id", "6",
88              "test_posoffpaytv", "boue",
89              "test_posofftv", "boue",
90              "test_basictv", "boue",
91              "test_notv", "boue",
92              "test_postv", "boue",
93              "test_offtv", "boue"
94      ));
95      assertU(adoc("id", "7",
96              "test_posoffpaytv", "glue",
97              "test_posofftv", "glue",
98              "test_basictv", "glue",
99              "test_notv", "glue",
100             "test_postv", "glue",
101             "test_offtv", "glue"
102     ));
103     assertU(adoc("id", "8",
104             "test_posoffpaytv", "blee",
105             "test_posofftv", "blee",
106             "test_basictv", "blee",
107             "test_notv", "blee",
108             "test_postv", "blee",
109             "test_offtv", "blee"
110     ));
111     assertU(adoc("id", "9",
112             "test_posoffpaytv", "blah",
113             "test_posofftv", "blah",
114             "test_basictv", "blah",
115             "test_notv", "blah",
116             "test_postv", "blah",
117             "test_offtv", "blah"
118     ));
119 
120     assertNull(h.validateUpdate(commit()));
121   }
122 
123   static String tv = "tvrh";
124 
125   @Test
126   public void testBasics() throws Exception {
127     assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true", TermVectorParams.TF, "true")
128        ,"/termVectors=={'0':{'uniqueKey':'0'," +
129             " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
130             " 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
131             " 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
132             " 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
133             " 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
134             " 'uniqueKeyFieldName':'id'}"
135     );
136     // tv.fl diff from fl
137     assertJQ(req("json.nl","map", 
138                  "qt",tv, 
139                  "q", "id:0", 
140                  "fl", "*,score",
141                  "tv.fl", "test_basictv,test_offtv",
142                  TermVectorComponent.COMPONENT_NAME, "true", 
143                  TermVectorParams.TF, "true")
144        ,"/termVectors=={'0':{'uniqueKey':'0'," +
145             " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
146             " 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
147             " 'uniqueKeyFieldName':'id'}"
148     );
149     // multi-valued tv.fl 
150     assertJQ(req("json.nl","map", 
151                  "qt",tv, 
152                  "q", "id:0", 
153                  "fl", "*,score",
154                  "tv.fl", "test_basictv",
155                  "tv.fl","test_offtv",
156                  TermVectorComponent.COMPONENT_NAME, "true", 
157                  TermVectorParams.TF, "true")
158        ,"/termVectors=={'0':{'uniqueKey':'0'," +
159             " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
160             " 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
161             " 'uniqueKeyFieldName':'id'}"
162     );
163     // re-use fl glob
164     assertJQ(req("json.nl","map", 
165                  "qt",tv, 
166                  "q", "id:0", 
167                  "fl", "*,score",
168                  TermVectorComponent.COMPONENT_NAME, "true", 
169                  TermVectorParams.TF, "true")
170        ,"/termVectors=={'0':{'uniqueKey':'0'," +
171             " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
172             " 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
173             " 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
174             " 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
175             " 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
176             " 'uniqueKeyFieldName':'id'}"
177     );
178     // re-use fl, ignore things we can't handle
179     assertJQ(req("json.nl","map", 
180                  "qt",tv, 
181                  "q", "id:0", 
182                  "fl", "score,test_basictv,[docid],test_postv,val:sum(3,4)",
183                  TermVectorComponent.COMPONENT_NAME, "true", 
184                  TermVectorParams.TF, "true")
185        ,"/termVectors=={'0':{'uniqueKey':'0'," +
186             " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
187             " 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
188             " 'uniqueKeyFieldName':'id'}"
189     );
190     // re-use (multi-valued) fl, ignore things we can't handle
191     assertJQ(req("json.nl","map", 
192                  "qt",tv, 
193                  "q", "id:0", 
194                  "fl", "score,test_basictv",
195                  "fl", "[docid],test_postv,val:sum(3,4)",
196                  TermVectorComponent.COMPONENT_NAME, "true", 
197                  TermVectorParams.TF, "true")
198        ,"/termVectors=={'0':{'uniqueKey':'0'," +
199             " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
200             " 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
201             " 'uniqueKeyFieldName':'id'}"
202     );
203 
204   }
205 
206   @Test
207   public void testOptions() throws Exception {
208     assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true"
209        , TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true")
210        ,"/termVectors/0/test_posofftv/anoth=={'tf':1, 'offsets':{'start':20, 'end':27}, 'positions':{'position':5}, 'df':2, 'tf-idf':0.5}"
211     );
212     
213     assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true"
214         , TermVectorParams.ALL, "true")
215         ,"/termVectors/0/test_posofftv/anoth=={'tf':1, 'offsets':{'start':20, 'end':27}, 'positions':{'position':5}, 'df':2, 'tf-idf':0.5}"
216      );
217     
218     // test each combination at random
219     final List<String> list = new ArrayList<>();
220     list.addAll(Arrays.asList("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true"));
221     String[][] options = new String[][] { { TermVectorParams.TF, "'tf':1" },
222         { TermVectorParams.OFFSETS, "'offsets':{'start':20, 'end':27}" },
223         { TermVectorParams.POSITIONS, "'positions':{'position':5}" },
224         { TermVectorParams.DF, "'df':2" },
225         { TermVectorParams.TF_IDF, "'tf-idf':0.5" } };
226     StringBuilder expected = new StringBuilder("/termVectors/0/test_posofftv/anoth=={");
227     boolean first = true;
228     for (int i = 0; i < options.length; i++) {
229       final boolean use = random().nextBoolean();
230       if (use) {
231         if (!first) {
232           expected.append(", ");
233         }
234         first = false;
235         expected.append(options[i][1]);
236         
237       }
238       list.add(options[i][0]);
239       list.add(use ? "true" : "false");
240     }
241     
242     expected.append("}");
243     assertJQ(req(list.toArray(new String[0])), expected.toString());
244   }
245 
246   @Test
247   public void testPerField() throws Exception {
248     assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true"
249         ,TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true"
250         ,TermVectorParams.FIELDS, "test_basictv,test_notv,test_postv,test_offtv,test_posofftv,test_posoffpaytv"
251         ,"f.test_posoffpaytv." + TermVectorParams.PAYLOADS, "false"
252         ,"f.test_posofftv." + TermVectorParams.POSITIONS, "false"
253         ,"f.test_offtv." + TermVectorParams.OFFSETS, "false"
254         ,"f.test_basictv." + TermVectorParams.DF, "false"
255         ,"f.test_basictv." + TermVectorParams.TF, "false"
256         ,"f.test_basictv." + TermVectorParams.TF_IDF, "false"
257         )
258     ,"/termVectors/0/test_basictv=={'anoth':{},'titl':{}}"
259     ,"/termVectors/0/test_postv/anoth=={'tf':1, 'positions':{'position':5}, 'df':2, 'tf-idf':0.5}"
260     ,"/termVectors/0/test_offtv/anoth=={'tf':1, 'df':2, 'tf-idf':0.5}"
261     ,"/termVectors/warnings=={ 'noTermVectors':['test_notv'], 'noPositions':['test_basictv', 'test_offtv'], 'noOffsets':['test_basictv', 'test_postv']}"
262     );
263   }
264 
265   @Test
266   public void testPayloads() throws Exception {
267     // This field uses TokenOffsetPayloadTokenFilter, which
268     // stuffs start (20) and end offset (27) into the
269     // payload:
270     assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true"
271                  , TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true",
272                  TermVectorParams.PAYLOADS, "true")
273        ,"/termVectors/0/test_posoffpaytv/anoth=={'tf':1, 'offsets':{'start':20, 'end':27}, 'positions':{'position':5}, 'payloads':{'payload': 'AAAAFAAAABs='}, 'df':2, 'tf-idf':0.5}"
274     );
275   }
276 }
277 
278 
279 /*
280 * <field name="test_basictv" type="text" termVectors="true"/>
281    <field name="test_notv" type="text" termVectors="false"/>
282    <field name="test_postv" type="text" termVectors="true" termPositions="true"/>
283    <field name="test_offtv" type="text" termVectors="true" termOffsets="true"/>
284    <field name="test_posofftv" type="text" termVectors="true"
285      termPositions="true" termOffsets="true"/>
286 *
287 * */